Forecasting Libraries

library(dplyr)
library(magrittr)
library(ggplot2)
library(forecast)
library(fpp)
library(readr)

## here we are going to find the root_file 
root <- rprojroot::find_rstudio_root_file()
## we are going to set the data file path here
dataDir <- file.path(root, 'Data')

This lab is created using material from Forecasting: Principles and Practice by Rob Hyndman. Please use the text to supplement anything covered here and beyond.

Forecastability

ts objects

A simple time series object is a collection of numbers with a corresponding time, which could be year, month, week, etc.

y <- ts(1:10, start = 2000)
y
## Time Series:
## Start = 2000 
## End = 2009 
## Frequency = 1 
##  [1]  1  2  3  4  5  6  7  8  9 10

If your time series’ observations happen more frequently than once per year, you can add to the frequency argument.

## Monthly
ts(1:100, start = 2000, frequency = 12)
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2000   1   2   3   4   5   6   7   8   9  10  11  12
## 2001  13  14  15  16  17  18  19  20  21  22  23  24
## 2002  25  26  27  28  29  30  31  32  33  34  35  36
## 2003  37  38  39  40  41  42  43  44  45  46  47  48
## 2004  49  50  51  52  53  54  55  56  57  58  59  60
## 2005  61  62  63  64  65  66  67  68  69  70  71  72
## 2006  73  74  75  76  77  78  79  80  81  82  83  84
## 2007  85  86  87  88  89  90  91  92  93  94  95  96
## 2008  97  98  99 100
## Quarterly
ts(1:100, start = 2000, frequency = 4)
##      Qtr1 Qtr2 Qtr3 Qtr4
## 2000    1    2    3    4
## 2001    5    6    7    8
## 2002    9   10   11   12
## 2003   13   14   15   16
## 2004   17   18   19   20
## 2005   21   22   23   24
## 2006   25   26   27   28
## 2007   29   30   31   32
## 2008   33   34   35   36
## 2009   37   38   39   40
## 2010   41   42   43   44
## 2011   45   46   47   48
## 2012   49   50   51   52
## 2013   53   54   55   56
## 2014   57   58   59   60
## 2015   61   62   63   64
## 2016   65   66   67   68
## 2017   69   70   71   72
## 2018   73   74   75   76
## 2019   77   78   79   80
## 2020   81   82   83   84
## 2021   85   86   87   88
## 2022   89   90   91   92
## 2023   93   94   95   96
## 2024   97   98   99  100
## Weekly
ts(1:100, start = 2000, frequency = 52.18)
## Time Series:
## Start = 2000 
## End = 2001.89727865082 
## Frequency = 52.18 
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
##  [18]  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34
##  [35]  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51
##  [52]  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68
##  [69]  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85
##  [86]  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100
citiBike <- read_csv(file.path(dataDir, "citiBike.csv"))

cbDay <- citiBike %>% mutate(date = lubridate::mdy(citiBike$Date),
                    day = lubridate::yday(date),
                    year = lubridate::year(date)) %>% 
    group_by(year,day) %>% 
    summarise(trips = sum(`Trips over the past 24-hours (midnight to 11:59pm)`),
              miles = sum(`Miles traveled today (midnight to 11:59 pm)`)) %>% ungroup() %>% 
    select(trips, miles)
cbDay <- ts(cbDay, start = c(2015,1), frequency =365)
cbWeek <- citiBike %>% mutate(date = lubridate::mdy(citiBike$Date),
                              week = lubridate::week(date),
                              year = lubridate::year(date)) %>% 
    group_by(year,week) %>% 
    summarise(trips = sum(`Trips over the past 24-hours (midnight to 11:59pm)`),
              miles = sum(`Miles traveled today (midnight to 11:59 pm)`)) %>% ungroup() %>% 
    select(trips, miles)
cbWeek <- ts(cbWeek, start = 2015, frequency = 52)
cbMonth <- citiBike %>% mutate(date = lubridate::mdy(citiBike$Date),
                    month = lubridate::month(date),
                    year = lubridate::year(date)) %>% 
    group_by(year,month) %>% 
    summarise(trips = sum(`Trips over the past 24-hours (midnight to 11:59pm)`),
              miles = sum(`Miles traveled today (midnight to 11:59 pm)`)) %>% ungroup() %>% 
    select(trips, miles)
cbMonth <- ts(cbMonth[,"trips"], start = 2015, frequency = 12)
newcb <- citiBike %>%
 rename(trips = `Trips over the past 24-hours (midnight to 11:59pm)`,
           miles = `Miles traveled today (midnight to 11:59 pm)`) %>%
 select(trips, miles)
cbts <- ts(newcb, start = 2015, frequency = 365)

Window function

Useful for subsetting your time series data by date.

You can either use the start date

cbWeek2 <- window(cbWeek, start = 2018)
autoplot(cbWeek2)

or end date, or both!

cbWeek3 <- window(cbWeek, end = 2018)
autoplot(cbWeek3)

Frequency

If the frequency of observations is greater than once per week, then there is usually more than one way of handling the frequency. For example, data with daily observations might have a weekly seasonality (frequency=7=7) or an annual seasonality (frequency=365.25=365.25).

Similarly, data that are observed every minute might have an hourly seasonality (frequency=60=60), a daily seasonality (frequency=24×60=1440=24×60=1440), a weekly seasonality (frequency=24×60×7=10080=24×60×7=10080) and an annual seasonality (frequency=24×60×365.25=525960=24×60×365.25=525960). If you want to use a ts object, then you need to decide which of these is the most important

Plotting Time Series

autoplot(cbWeek) + ggtitle("Citibike Trips") +
    xlab("Day") + ylab("Trips")

autoplot(fpp::a10) +
  ggtitle("Antidiabetic drug sales") +
  ylab("$ million") +
  xlab("Year")

Time Series Patterns

Trend a pattern exists when there is a long-term increase or decrease in the data. ex. Population growth

Seasonal a pattern exists when a series is influenced by seasonal factors (e.g., the quarter of the year, the month, or day of the week). ex. Ice cream sales

Cyclic a pattern exists when data exhibit rises and falls that are not of fixed period (duration usually of at least 2 years). ex. economic recovery

Irregular (random) no specific pattern exhibited, also called noise.

ACF plots

Autocorrelation is the strength of the relatiopship with previous observations of the data. Remember that normal correlation wast the relationship between two variables? Autocorrelation is just the relationship the variable has on its self. Using the normal distribution as an assumption we’ll score the correlations again with the Pearson’s between -1 and 1.

The Partial Autocorrelation at lag k is the correlation that results after removing the effect of any correlations due to the terms at shorter lags.

more about autocorrelation

ggAcf(cbWeek[,"trips"])

avereage method

forecasts can be as simple as the average or mean value of historical numbers. \[\hat{y}_{T+h|T}= \bar{y} = (y_1 + ... + y_T) / T \]

meanf(cbWeek[,"trips"], h = 10)
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2019.173       271963.4 119531.7 424395.1 38242.71 505684.1
## 2019.192       271963.4 119531.7 424395.1 38242.71 505684.1
## 2019.212       271963.4 119531.7 424395.1 38242.71 505684.1
## 2019.231       271963.4 119531.7 424395.1 38242.71 505684.1
## 2019.250       271963.4 119531.7 424395.1 38242.71 505684.1
## 2019.269       271963.4 119531.7 424395.1 38242.71 505684.1
## 2019.288       271963.4 119531.7 424395.1 38242.71 505684.1
## 2019.308       271963.4 119531.7 424395.1 38242.71 505684.1
## 2019.327       271963.4 119531.7 424395.1 38242.71 505684.1
## 2019.346       271963.4 119531.7 424395.1 38242.71 505684.1

Naive method

we can also set the forecast as the last observed number.

\[\hat{y}_{T+h|T} = y_T\]

naive(cbWeek[,"trips"], h = 10)
##          Point Forecast       Lo 80    Hi 80      Lo 95    Hi 95
## 2019.173          75721    8360.523 143081.5  -27297.96 178740.0
## 2019.192          75721  -19541.100 170983.1  -69969.81 221411.8
## 2019.212          75721  -40950.768 192392.8 -102713.07 254155.1
## 2019.231          75721  -58999.953 210442.0 -130316.92 281758.9
## 2019.250          75721  -74901.605 226343.6 -154636.40 306078.4
## 2019.269          75721  -89277.797 240719.8 -176622.88 328064.9
## 2019.288          75721 -102498.070 253940.1 -196841.55 348283.5
## 2019.308          75721 -114803.200 266245.2 -215660.62 367102.6
## 2019.327          75721 -126360.430 277802.4 -233335.88 384777.9
## 2019.346          75721 -137291.531 288733.5 -250053.55 401495.6
# Alternatively
rwf(cbWeek[,"trips"], h = 10)
##          Point Forecast       Lo 80    Hi 80      Lo 95    Hi 95
## 2019.173          75721    8360.523 143081.5  -27297.96 178740.0
## 2019.192          75721  -19541.100 170983.1  -69969.81 221411.8
## 2019.212          75721  -40950.768 192392.8 -102713.07 254155.1
## 2019.231          75721  -58999.953 210442.0 -130316.92 281758.9
## 2019.250          75721  -74901.605 226343.6 -154636.40 306078.4
## 2019.269          75721  -89277.797 240719.8 -176622.88 328064.9
## 2019.288          75721 -102498.070 253940.1 -196841.55 348283.5
## 2019.308          75721 -114803.200 266245.2 -215660.62 367102.6
## 2019.327          75721 -126360.430 277802.4 -233335.88 384777.9
## 2019.346          75721 -137291.531 288733.5 -250053.55 401495.6

Seasonal naive method

similarliy to the naive method, the forecast is equal to the last observed number from the same season a year ago. m is the seasonal period and k is the interger part of (h - 1)/m \[\hat{y}_{T+h|T} = y_{T+h-m(k+1)}\]

snaive(cbWeek[,"trips"], h = 10)
##          Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## 2019.173         222760 129216.3 316303.7  79697.31 365822.7
## 2019.192         217607 124063.3 311150.7  74544.31 360669.7
## 2019.212         247831 154287.3 341374.7 104768.31 390893.7
## 2019.231         204041 110497.3 297584.7  60978.31 347103.7
## 2019.250         222175 128631.3 315718.7  79112.31 365237.7
## 2019.269         204358 110814.3 297901.7  61295.31 347420.7
## 2019.288         272528 178984.3 366071.7 129465.31 415590.7
## 2019.308         221793 128249.3 315336.7  78730.31 364855.7
## 2019.327         331878 238334.3 425421.7 188815.31 474940.7
## 2019.346         325310 231766.3 418853.7 182247.31 468372.7

Drift method

rwf(cbWeek[,"trips"], h = 10, drift = TRUE)
##          Point Forecast       Lo 80    Hi 80      Lo 95    Hi 95
## 2019.173       75759.22    8086.179 143432.3  -27737.76 179256.2
## 2019.192       75797.44  -20346.714 171941.6  -71242.35 222837.2
## 2019.212       75835.65  -42452.860 194124.2 -105071.02 256742.3
## 2019.231       75873.87  -61330.591 213078.3 -133962.25 285710.0
## 2019.250       75912.09  -78173.517 229997.7 -159741.51 311565.7
## 2019.269       75950.31  -93590.546 245491.2 -183340.05 335240.7
## 2019.288       75988.52 -107941.476 259918.5 -205308.14 357285.2
## 2019.308       76026.74 -121459.476 273513.0 -226002.36 378055.8
## 2019.327       76064.96 -134305.631 286435.5 -245669.09 397799.0
## 2019.346       76103.18 -146596.579 298802.9 -264486.71 416693.1
autoplot(cbWeek[,"trips"]) +
  autolayer(meanf(cbWeek[,"trips"], h=11),
    series="Mean", PI=FALSE) +
  autolayer(naive(cbWeek[,"trips"], h=11),
    series="Naïve", PI=FALSE) +
  autolayer(snaive(cbWeek[,"trips"], h=11),
    series="Seasonal naïve", PI=FALSE) +
  ggtitle("Forecasts for quarterly beer production") +
  xlab("Year") + ylab("Megalitres") +
  guides(colour=guide_legend(title="Forecast"))

Residuals

Here we can check how our models predictions are doing against the actual data.

fitted(snaive(cbWeek[,"trips"])) %>% autoplot(series ="Fitted") +
    autolayer(cbWeek[,"trips"], series = "Data")

Now we can take a look at the residuals and plot them out.

res <- snaive(cbWeek[,"trips"]) %>% residuals

autoplot(res)

gghistogram(res, add.normal = TRUE)

We also want to check the ACF of residuals and expect no significant correlations. If so there are probably other information left in the residuals that should be used in computing forecasts. If we looked at the residual time series plot, you can see that it is not staying aorund a mean. Although our histogram shows normality, which will give us confidence in our prediction intervals.

ggAcf(res)

All together now

checkresiduals(snaive(cbWeek[,"trips"]))

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 44.922, df = 43.4, p-value = 0.4079
## 
## Model df: 0.   Total lags used: 43.4

Calendar Adjustment

The number of days per month are different so monthly numbers could be affected by them.

monthDay <- cbind(Monthly = cbMonth, DailyAverage = cbMonth/monthdays(cbMonth))

autoplot(monthDay, facet = TRUE)

Other Adjustments to consider:

  • Adjusting for inflation (using Consumer Price Index)
  • log transformations (useful for monetary variables)
  • power transformations (you can use square roots or cube roots)

Box-Cox Transformation

This transformation utilizes both logarithm and power transformation with a parameter called \(\lambda\).

\[w_t = \begin{cases} \\log(y_t), &\text{if } \lambda = 0 \\ \dfrac{y_i^\lambda-1}{\lambda}, &\text{otherwise. } \end{cases}\]

you can see that if \(\lambda = 0\) then the natural log is used, but if \(\lambda \neq 0\) the power transformation is used. One more thing of note is that if \(\lambda = 1\) then \(w_t = y_t -1\) which means the data will be shifted down.

lambdaVal <- BoxCox.lambda(monthDay)
autoplot(BoxCox(monthDay, lambdaVal))

Bias Adjustment

When we get the back-transformed forecast from the Box-Cox transofmation, it will return the median of the forecast distribution rather than the mean. The difference between the mean and the median is called the bias hence bias-adjusted point forecasts.

fc <- rwf(monthDay[,"Monthly"], drift=TRUE, lambda=0)
fc2 <- rwf(monthDay[,"Monthly"], drift=TRUE, lambda=0, biasadj=TRUE)
autoplot(monthDay[,"Monthly"]) +
  autolayer(fc, series="Simple back transformation") +
  autolayer(fc2, series="Bias adjusted", PI=FALSE) +
  guides(colour=guide_legend(title="Forecast")) + ylab("Trips")

Train and Test

train <- window(cbWeek[,"trips"], end = 2018.7)
test <- window(cbWeek[,"trips"], start = 2018.7)

fc1 <- meanf(train,h=20)
fc2 <- rwf(train,h=20)
fc3 <- snaive(train,h=20,drift=TRUE)
accuracy(fc1, test)
##                         ME     RMSE       MAE       MPE     MAPE     MASE
## Training set -9.299495e-12 116405.3  96525.54 -62.34543 86.69783 1.558831
## Test set      6.448302e+04 135813.3 111716.24 -51.19888 92.47630 1.804152
##                   ACF1 Theil's U
## Training set 0.8948701        NA
## Test set     0.7704272 0.2436112
accuracy(fc2, test)
##                      ME      RMSE       MAE        MPE      MAPE      MASE
## Training set    2189.37  49017.01  37312.34  -11.25874  28.27481 0.6025726
## Test set     -155826.35 196390.06 155826.35 -175.71696 175.71696 2.5165051
##                    ACF1 Theil's U
## Training set -0.2144900        NA
## Test set      0.7704272  1.233567
accuracy(fc3, test)
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 48771.15 73470.43 61921.73 -1.462765 41.56140 1.0000000
## Test set     25708.95 72287.52 58162.45 -0.488595 32.60272 0.9392898
##                     ACF1 Theil's U
## Training set 0.264827395        NA
## Test set     0.003218028 0.3741866
autoplot(cbind(training = train, test = test)) + 
    autolayer(fc1, series = "meanf", PI = FALSE)+ 
    autolayer(fc2, series = "rwf", PI = FALSE)+ 
    autolayer(fc3, series = "snaive", PI = FALSE)

Time series Cross Validation

train %>%
tsCV(forecastfunction=snaive, drift = TRUE, h=1) -> e

e^2 %>% mean(na.rm=TRUE) %>% sqrt
## [1] 101985.1
train %>% snaive() %>% residuals -> res

res^2 %>% mean(na.rm=TRUE) %>% sqrt
## [1] 73470.43

comparing results.

fc6 <- snaive(train, h =20)

autoplot(train) + 
    autolayer(forecast(e, h = 20), series = "tscv",PI = FALSE)+
    autolayer(fc6, series = "snaive", PI = FALSE) + 
    autolayer(test, series = "test")

accuracy(forecast(e, h = 20), test)
##                       ME      RMSE       MAE      MPE     MAPE      MASE
## Training set   -733.5709  47893.08  35843.06  51.6275 118.9048 0.4601536
## Test set     302747.9769 314406.65 302747.98 100.1145 100.1145 3.8866815
##                   ACF1 Theil's U
## Training set 0.1293341        NA
## Test set     0.5941300  1.202097
accuracy(fc6, test)
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 48771.15 73470.43 61921.73 -1.462765 41.56140 1.0000000
## Test set     25708.95 72287.52 58162.45 -0.488595 32.60272 0.9392898
##                     ACF1 Theil's U
## Training set 0.264827395        NA
## Test set     0.003218028 0.3741866

ETS

There are 15 different kinds of exponential smoothing algorithms

Simple exponential smoothing

if we consider the naive method… \[\hat y_{T+h|T} = y_T \] the last observed value will be our forecast.

If we consider the avereage method… \[\hat y_{T+h|T} = \frac{1}{T} \sum_{t=1}^T y_t\] all observations are equally weighted.

To get something in between these extremes exponential smoothing can be employed… \[\hat y_{T+1|T} = \alpha y_T + \alpha(1-\alpha) y_{T-1}+ \alpha(1-\alpha)^2 y_{T-2} + ...\] We add a new variable called \(\apha\) which is going to be \(0 \leq \alpha \leq 1\). You can see the most recent variable is \(\alpha y_T\) getting the full weight, while the next one is \(\alpha(1-\alpha) y_{T-1}\) getting multiplied by itself, the next one is a square, the next one after that would be a cube and you’ll see that the older and older values from our timeseries will be weighted less.

An alternative representation is the component form. We only have 1 component for smoothing, later we will look at seasonality and trend.

We can think of the component as a level, and the algorithm “learns” the new level from the data. you have to initialize \(l_1\) with some value and often it can be the first value.

oildata <- window(fpp2::oil, start=1996)
fc <- forecast::ses(oildata, h = 5)
round(forecast::accuracy(fc), 2)
##               ME  RMSE   MAE MPE MAPE MASE  ACF1
## Training set 6.4 28.12 22.26 1.1 4.61 0.93 -0.03

additive approaches only give seasonal where the average seasonal affects are seen. multiplicative will also have trend to it.

Trend Methods

The simple exponential smoothing model can be extended by taking into account trends. There are three equations, one for the forecast, two for smoothing. \[\begin{align*} \hat{y}_{t+h|t} &= \ell_{t} + hb_{t} \\ \ell_{t} &= \alpha(y_{t} - s_{t-m}) + (1 - \alpha)(\ell_{t-1} + b_{t-1})\\ b_{t} &= \beta^*(\ell_{t} - \ell_{t-1}) + (1 - \beta^*)b_{t-1}\\ \end{align*} \]

window(ausair, start=1990, end=2004) %>%
holt(h=5, PI=FALSE) %>%
autoplot()

Damped Trends

the Holt’s method forecasts are linear and indefinitely increasing or decreasing into the future. Damped forecasts will flatten the line over time.

air <- window(ausair, start=1990)
fcs <- forecast::ses(air, h = 15)
fc <- holt(air, h=15)
fc2 <- holt(air, damped=TRUE, phi = 0.9, h=15)
autoplot(air) +
    autolayer(fcs, series="SES", PI=FALSE) +
    autolayer(fc, series="Holt's method", PI=FALSE) +
    autolayer(fc2, series="Damped Holt's method", PI=FALSE) +
    ggtitle("Forecasts from Holt's method") + xlab("Year") +
    ylab("Air passengers in Australia (millions)") +
    guides(colour=guide_legend(title="Forecast"))

Seasonal Methods

The next component to add is seasonality. We are adding one more smoothing equation for \(s_t\). We wil use \(m\) to denote the frequency of the seasonailty, so quarterly seasonaltiy will be \(m = 4\) or monthly would be \(m = 12\).

Additive Method

\[\begin{align*} \hat{y}_{t+h|t} &= \ell_{t} + hb_{t} + s_{t+h-m(k+1)} \\ \ell_{t} &= \alpha(y_{t} - s_{t-m}) + (1 - \alpha)(\ell_{t-1} + b_{t-1})\\ b_{t} &= \beta^*(\ell_{t} - \ell_{t-1}) + (1 - \beta^*)b_{t-1}\\ s_{t} &= \gamma (y_{t}-\ell_{t-1}-b_{t-1}) + (1-\gamma)s_{t-m}, \end{align*} \]

Multiplicative Method

\[\begin{align*} \hat{y}_{t+h|t} &= (\ell_{t} + hb_{t})s_{t+h-m(k+1)} \\ \ell_{t} &= \alpha \frac{y_{t}}{s_{t-m}} + (1 - \alpha)(\ell_{t-1} + b_{t-1})\\ b_{t} &= \beta^*(\ell_{t}-\ell_{t-1}) + (1 - \beta^*)b_{t-1} \\ s_{t} &= \gamma \frac{y_{t}}{(\ell_{t-1} + b_{t-1})} + (1 - \gamma)s_{t-m} \end{align*} \]

trips <- window(cbMonth)

fit1 <- hw(trips,seasonal="additive", h = 40)
fit2 <- hw(trips,seasonal="multiplicative", h = 40)
autoplot(trips) +
  autolayer(fit1, series="HW additive forecasts", PI=FALSE) +
  autolayer(fit2, series="HW multiplicative forecasts",
    PI=FALSE) +
  xlab("Year") +
  ylab("Visitor nights (millions)") +
  ggtitle("International visitors nights in Australia") +
  guides(colour=guide_legend(title="Forecast"))

plot(fit1$model$states[,1:3], col = "blue", main = "Additive Components")

plot(fit2$model$states[,1:3], col = "blue", main = "Multiplicative Components")

ETS

ARIMA Models

Stationarity & Differencing

A Stationary time series is one whose porpeties do not depend on the timea t twhich the sries is obvsered. This means that time series with trends and seasonality are not considered to be stationary. Remeber cyclical patterns? Those are considered staionary series. A stationary series will have no preidtavale pattern in the the long run.

  • roughly horizontal
  • constant variance
  • no patterns predictable in the long-term

Differencing is when you compute the differences btwenne consiectuive observations. THis is another form of transformation that can turn non-stationary data into stationary. This technique can help stabilize the mean of a time series by removing the changes in teh level(removing the tren and seasonality).

Occasionaly the differenced data is not stationary and may need to be differenced a second time. This is called a Second-Order Differencing when you difference a second time.

cbDay[,"trips"]  %>% autoplot()

cbDay[,"trips"] %>% log() %>% autoplot()

cbDay[,"trips"] %>% log() %>% diff(lag = 365) %>% autoplot()

cbDay[,"trips"] %>% log() %>% diff(lag = 365) %>% diff(lag=1) %>% autoplot()

Autoregressive models

Compared to a multiple regression model, where a forecast is made using a linear combination of predictors, the autoregressive model uses a linear combiniation of past values of the variable. Remember for time series forecasting so far we have only used one variable and the past variables to predict the future.

an autogressive model of order \(p\) can be written as:

\[y_{t} = c + \phi_{1}y_{t-1} + \phi_{2}y_{t-2} + \dots + \phi_{p}y_{t-p} + \varepsilon_{t}\] The \(p\) is for the order of the autoregressive model.

Moving average models

Unline the autoregressive model, a moving average model uses past forecast erros in a regression like model.

\[y_{t} = c + \varepsilon_t + \theta_{1}\varepsilon_{t-1} + \theta_{2}\varepsilon_{t-2} + \dots + \theta_{q}\varepsilon_{t-q},\]

In the moving average case \(q\) is the order of the model.

Non-seasonal ARIMA

Non-seasonal ARIMA models are when you combine differenceing with autroregression and a moving average model. ARIMA stands for AutoRegressive Integrated Moving Average, where integrataion is the reverese of differenceing.

\[ y'_{t} = c + \phi_{1}y'_{t-1} + \cdots + \phi_{p}y'_{t-p} + \theta_{1}\varepsilon_{t-1} + \cdots + \theta_{q}\varepsilon_{t-q} + \varepsilon_{t}\]

The model has both lagged values of \(y_t\) and lagged errors.

The constant \(c\) has an important effect on the long-term forecasts obtained from these models. The value of \(d\) also has an effect on the prediction intervals — the higher the value of \(d\), the more rapidly the prediction intervals increase in size. For
\(d=0\), the long-term forecast standard deviation will go to the standard deviation of the historical data, so the prediction intervals will all be essentially the same.

  • If \(c=0\) and \(d=0\), the long-term forecasts will go to zero
  • If \(c=0\) and \(d=1\), the long-term forecasts will go to a non-zero constant.
  • If \(c=0\) and \(d=2\), the long-term forecasts will follow a straight line.
  • If \(c\ne0\) and \(d=0\), the long-term forecasts will go to the mean of the data.
  • If \(c\ne0\) and \(d=1\), the long-term forecasts will follow a straight line.
  • If \(c\ne0\) and \(d=2\), the long-term forecasts will follow a quadratic trend.

Example 1

autoplot(cbDay[,"trips"] %>% diff(lag = 1))

fit <- auto.arima(cbDay[,"trips"] %>% diff(lag = 1), seasonal = FALSE)
fit
## Series: cbDay[, "trips"] %>% diff(lag = 1) 
## ARIMA(1,0,5) with zero mean 
## 
## Coefficients:
##          ar1      ma1      ma2     ma3      ma4     ma5
##       0.3670  -0.8963  -0.0476  0.0372  -0.1120  0.1345
## s.e.  0.1099   0.1090   0.0694  0.0405   0.0389  0.0252
## 
## sigma^2 estimated as 95583426:  log likelihood=-15812.13
## AIC=31638.27   AICc=31638.35   BIC=31675.42

\[y_t = c + 0.3670 y_{t-1} -0.8963 \varepsilon_{t-1} -0.0476 \varepsilon_{t-2} + 0.0372\varepsilon_{t-3} -0.1120\varepsilon_{t-4} +0.1345 \varepsilon_{t-5} + \varepsilon_{t},\] \(c=0\) since mean is zero and \(\varepsilon_t\) is \(9776.678 = \sqrt95583426\)

fit %>% forecast(h=10) %>% autoplot(include=80)

Example 2

autoplot(cbWeek[,"trips"] %>% diff(lag = 52) %>% diff(lag = 1))

fit <- auto.arima(cbWeek[,"trips"] %>% diff(lag = 52) %>% diff(lag = 1), seasonal = FALSE)
fit
## Series: cbWeek[, "trips"] %>% diff(lag = 52) %>% diff(lag = 1) 
## ARIMA(1,0,1) with zero mean 
## 
## Coefficients:
##          ar1      ma1
##       0.1952  -0.9649
## s.e.  0.0822   0.0224
## 
## sigma^2 estimated as 3.183e+09:  log likelihood=-2027.09
## AIC=4060.17   AICc=4060.32   BIC=4069.47

In ARIMA(\(p,d,q\)):

Usually determing \(p\): - The ACF is exponentially decaying or sinusodial; - There is a significant spike at lap \(p\) in the PACF, but none byeond lag \(p\)

Usually determing \(q\): - The PACF is exponentially decaying or sinusodial; - There is a significant spike at lap \(q\) in the ACF, but none byeond lag \(q\)

ggAcf(cbWeek[,"trips"] %>% diff(lag = 52) %>% diff(lag = 1), main = "CitiBike Trips per Week")

ggPacf(cbWeek[,"trips"] %>% diff(lag = 52) %>% diff(lag = 1), main = "CitiBike Trips per Week")

seasonal ARIMAs

cbMonth[,"trips"] %>% ggtsdisplay()

fit3 <- auto.arima(cbMonth[,"trips"], seasonal = TRUE)
fit3
## Series: cbMonth[, "trips"] 
## ARIMA(0,1,1)(0,1,0)[12] 
## 
## Coefficients:
##           ma1
##       -0.8062
## s.e.   0.1136
## 
## sigma^2 estimated as 2.27e+10:  log likelihood=-480.32
## AIC=964.65   AICc=965.01   BIC=967.82
checkresiduals(fit3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)(0,1,0)[12]
## Q* = 10.508, df = 8.8, p-value = 0.2943
## 
## Model df: 1.   Total lags used: 9.8
fit3 %>% forecast(h=12) %>% autoplot()

ARIMA vs ETS

While linear exponential smoothing models are all special cases of ARIMA models, the non-linear exponential smoothing models have no equivalent ARIMA counterparts. On the other hand, there are also many ARIMA models that have no exponential smoothing counterparts. In particular, all ETS models are non-stationary, while some ARIMA models are stationary.

The AICc is useful for selecting between models in the same class. For example, we can use it to select an ARIMA model between candidate ARIMA models17 or an ETS model between candidate ETS models. However, it cannot be used to compare between ETS and ARIMA models because they are in different model classes, and the likelihood is computed in different ways

e1 <- tsCV(y = cbMonth[,"trips"], function(y,h) ets(y) %>% forecast(h = h), h=1)
# Compute CV errors for ARIMA as e2
e2 <- tsCV(y = cbMonth[,"trips"], function(y,h) auto.arima(y) %>% forecast(h = h), h=1)
# Find MSE of each model class
mean(e1^2, na.rm=TRUE)
## [1] 55905603984
#> [1] 7.864
mean(e2^2, na.rm=TRUE)
## [1] 67725649568
cbMonth[,"trips"] %>% ets() %>% forecast() %>% autoplot()

dayTrips <- cbDay[,"trips"] %>% diff(lag = 1)
# Use 3 years of the data as the training set
train <- window(dayTrips, end=c(2018,1))
fit.arima <- auto.arima(train)
checkresiduals(fit.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(5,0,1) with zero mean
## Q* = 757.39, df = 213, p-value < 2.2e-16
## 
## Model df: 6.   Total lags used: 219
fit.ets <- ets(train)
checkresiduals(fit.ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,N)
## Q* = 1948.8, df = 217, p-value < 2.2e-16
## 
## Model df: 2.   Total lags used: 219
a1 <- fit.arima %>% forecast(h = 10) %>%
  accuracy(dayTrips)
a1[,c("RMSE","MAE","MAPE","MASE")]
##                  RMSE      MAE     MAPE      MASE
## Training set 8824.870 6405.411      Inf 0.6261876
## Test set     9271.098 6994.791 127.4045 0.6838049
a2 <- fit.ets %>% forecast(h = 10) %>%
  accuracy(dayTrips)
a2[,c("RMSE","MAE","MAPE","MASE")]
##                  RMSE      MAE     MAPE      MASE
## Training set 10274.34 7134.381      Inf 0.6974511
## Test set      9593.40 6992.226 100.0162 0.6835542
dayTrips %>% auto.arima() %>% forecast(h = 10) %>% autoplot()